- name: cni-cilium
  rules:
  - alert: CiliumAgentUnreachableHealthEndpoints
    expr: max by (namespace, pod) (cilium_unreachable_health_endpoints) > 0
    for: 5m
    labels:
      d8_module: cni-cilium
      d8_component: agent
      severity_level: "4"
      tier: cluster
    annotations:
      plk_protocol_version: "1"
      plk_markup_format: "markdown"
      plk_grouped_by__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      plk_create_group_if_not_exists__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      summary: Some nodes' health endpoints are not reachable by agent {{ $labels.namespace }}/{{ $labels.pod }}.
      description: |
        Check what's going on: `kubectl -n {{ $labels.namespace }} logs {{ $labels.pod }}`

  - alert: CiliumAgentEndpointsNotReady
    expr: sum by (namespace, pod) (cilium_endpoint_state{endpoint_state="ready"} / cilium_endpoint_state) < 0.5
    for: 5m
    labels:
      d8_module: cni-cilium
      d8_component: agent
      severity_level: "4"
      tier: cluster
    annotations:
      plk_protocol_version: "1"
      plk_markup_format: "markdown"
      plk_grouped_by__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      plk_create_group_if_not_exists__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      summary: More than half of all known Endpoints are not ready in agent {{ $labels.namespace }}/{{ $labels.pod }}.
      description: |
        Check what's going on: `kubectl -n {{ $labels.namespace }} logs {{ $labels.pod }}`

  - alert: CiliumAgentMapPressureCritical
    expr: sum by (namespace, pod, map_name) (cilium_bpf_map_pressure > 0.9)
    for: 5m
    labels:
      d8_module: cni-cilium
      d8_component: agent
      severity_level: "4"
      experimental: "true"
      tier: cluster
    annotations:
      plk_protocol_version: "1"
      plk_markup_format: "markdown"
      plk_grouped_by__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      plk_create_group_if_not_exists__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      summary: eBPF map {{ $labels.map_name }} is more than 90% full in agent {{ $labels.namespace }}/{{ $labels.pod }}.
      description: We've reached resource limit of eBPF maps. Consult with vendor for possible remediation steps.

  - alert: CiliumAgentPolicyImportErrors
    expr: sum by (namespace, pod) (rate(cilium_policy_import_errors_total[2m]) > 0)
    for: 5m
    labels:
      d8_module: cni-cilium
      d8_component: agent
      severity_level: "4"
      tier: cluster
    annotations:
      plk_protocol_version: "1"
      plk_markup_format: "markdown"
      plk_grouped_by__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      plk_create_group_if_not_exists__main: CiliumAgentMalfunctioning,prometheus=deckhouse,tier=~tier,kubernetes=~kubernetes
      summary: Agent {{ $labels.namespace }}/{{ $labels.pod }} fails to import policies.
      description: |
        Check what's going on: `kubectl -n {{ $labels.namespace }} logs {{ $labels.pod }}`
